
#Code for "Concealing Disease: Trade and Travel Barriers and the Timeliness of Outbreak Reporting" 
#International Studies Perspectives
#Catherine Z. Worsnop, University of Maryland School of Public Policy, cworsnop@umd.edu

#Install and load old version of Zelig in order to run Cox PH models
install.packages("Zelig", repos = "http://r.iq.harvard.edu/archived/", type = "source")

library(Zelig)
library(gee)
library(mvtnorm)
library(ZeligChoice)
library(cem)
library(foreign)
library(coxme)
library(survival)
library(xtable)

#Load data

(load("Concealing Disease data.RData"))

#########
#Table 1#
#########

#Model 1

Capacity<- zelig(Surv(startreportdays1,report) ~
                    loghealth+loginter+ihr_inforce+year, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(Capacity)

#Model 2

Costs<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
                year, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(Costs)

#Model 3

Full<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
              year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(Full)

#Model 4

RoL<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
              year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+ihr_inforce*rolwb, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(RoL)

###############################################################
##Figure 1 and quantitites of interest mentioned in the paper##
###############################################################

#Trade

set.seed(02143)
x.low<-setx(Costs, logtrade=quantile(na.omit(timing1$logtrade))[2])
x.high<-setx(Costs,  logtrade=quantile(na.omit(timing1$logtrade))[4])
s.out1<-sim(Costs,x=x.low, x1=x.high)
summary(s.out1)

costs.low<-quantile(s.out1$qi$hr, .025)
costs.high<-quantile(s.out1$qi$hr, .975)
costs.est<-mean(s.out1$qi$hr)

1/costs.est

#Domestic opposition

set.seed(02143)
x.low<-setx(Costs, domesticopp=0, )
x.high<-setx(Costs,  domesticopp=1)
s.out1<-sim(Costs,x=x.low, x1=x.high)
summary(s.out1)

domesticopp.low<-quantile(s.out1$qi$hr, .025)
domesticopp.high<-quantile(s.out1$qi$hr, .975)
domesticopp.est<-mean(s.out1$qi$hr)

1/domesticopp.est

#Internet coverage

set.seed(02143)
x.low<-setx(Costs,  loginter=quantile(na.omit(timing1$loginter))[2])
x.high<-setx(Costs,  loginter=quantile(na.omit(timing1$loginter))[4])
s.out1<-sim(Costs,x=x.low, x1=x.high)
summary(s.out1)

internet.low<-quantile(s.out1$qi$hr, .025)
internet.high<-quantile(s.out1$qi$hr, .975)
internet.est<-mean(s.out1$qi$hr)

#Health spending

set.seed(02143)
x.low<-setx(Costs, loghealth=quantile(na.omit(timing1$loghealth))[2])
x.high<-setx(Costs,  loghealth=quantile(na.omit(timing1$loghealth))[4])
s.out1<-sim(Costs,x=x.low, x1=x.high)
summary(s.out1)

health.low<-quantile(s.out1$qi$hr, .025)
health.high<-quantile(s.out1$qi$hr, .975)
health.est<-mean(s.out1$qi$hr)

#Rule of Law and IHR

##Before IHR in force
set.seed(02143)
x.low<-setx(RoL, ihr_inforce=0, rolwb=quantile(na.omit(timing1$rolwb))[2])
x.high<-setx(RoL,  ihr_inforce=0, rolwb=quantile(na.omit(timing1$rolwb))[4])
s.out1<-sim(RoL,x=x.low, x1=x.high)
summary(s.out1)

preIHR.low<-quantile(s.out1$qi$hr, .025)
preIHR.high<-quantile(s.out1$qi$hr, .975)
preIHR.est<-mean(s.out1$qi$hr)

1/preIHR.est

##After IHR in force
set.seed(02143)
x.low<-setx(RoL, ihr_inforce=1, rolwb=quantile(na.omit(timing1$rolwb))[2])
x.high<-setx(RoL,  ihr_inforce=1, rolwb=quantile(na.omit(timing1$rolwb))[4])
s.out1<-sim(RoL,x=x.low, x1=x.high)
summary(s.out1)

postIHR.low<-quantile(s.out1$qi$hr, .025)
postIHR.high<-quantile(s.out1$qi$hr, .975)
postIHR.est<-mean(s.out1$qi$hr)

#####################
##Plotting Figure 1##
#####################

par(mai=c(0.7,0.7,.1,.2), mgp=c(2.5,1,0))

xaxis<-c(1,1.5,2,2.5)
points<-c(costs.est,domesticopp.est, internet.est, health.est)

plot(xaxis,points, ylim=c(0,2.2),xlim=c(.8,3),yaxt='n', xaxt='n', yaxs="i",type="n", cex.main=0.70, cex.lab=0.90, main="", xlab="", ylab="")

# lines(x=c(1,1),c(agri.high,agri.low), type='l', lty=1, lwd=2.5, col="gray68")
# points(1,costs.est, pch= 20)

lines(x=c(1,1),c(costs.high,costs.low), type='l', lty=1, lwd=2.5, col="gray68")
points(1,costs.est, pch= 20)
abline(h=1,lty=2, lwd=.5)
lines(x=c(1.6,1.6),c(domesticopp.low, domesticopp.high), type='l', lty=1, lwd=2.5, col="gray68")
points(1.6,domesticopp.est, pch= 20, col="black", bg="white")
# lines(x=c(2.2,2.2),c(ihreffectnoihr.high,ihreffectnoihr.low), type='l', lty=1, lwd=2.5, col="gray68")
# points(2.2,ihreffectnoihr.est, pch= 19)
# lines(x=c(2.8,2.8),c(ihreffectihr.high,ihreffectihr.low), type='l', lty=1, lwd=2.5, col="gray68")
# points(2.8,ihreffect.est, pch= 19)
lines(x=c(2.2,2.2),c(internet.high,internet.low), type='l', lty=1, lwd=2.5, col="gray68")
points(2.2,internet.est, pch= 20)
lines(x=c(2.8,2.8),c(health.high,health.low), type='l', lty=1, lwd=2.5, col="gray68")
points(2.8,health.est, pch= 20)

axis(2,seq(0,2.2,.2),labels=seq(0,2.2,.2), cex.axis=.8)
axis(1,at=c(1,1.6,2.2,2.8),labels=c( "high/low trade","high/low domestic opp.","high/low internet", "high/low health exp."), cex.axis=.8)
mtext("Hazard ratio of reporting", side=2, line=2.1, cex=1)
# mtext("Percentage of Countries in Region Imposing Barriers ", side=1, line=2.1, cex=1)
legend(.9, .3, c("95% confidence interval"), cex=0.8, lwd=c(2.5), lty=c(1), col=c("gray68"), bty="n")
# legend(4, -.5, c(expression(bolditalic('N ')* bold("= 194 states")), "95% confidence interval"), cex=0.8, lwd=c(0, 2.5), lty=c(0,1), col=c("black", "gray58"))


###########
#Figure 2##
###########

set.seed(02143)
seq<-seq(-9,4.5, by=1.5)

fd.est=c()
ci.low=c()
ci.high=c()

for (i in seq){
  t2m2<-zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
                year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+loginter*logtravtourgdp, data=timing1, model="coxph", cluster="region", robust=TRUE)
  x.low<-setx(t2m2, logtravtourgdp=quantile(na.omit(timing1$logtravtourgdp))[2],loginter=i)
  x.high<-setx(t2m2, logtravtourgdp=quantile(na.omit(timing1$logtravtourgdp))[4], loginter=i)
  s.out1<-sim(t2m2,x=x.low, x1=x.high)
  
  fd<-s.out1$qi$hr
  
  fd.est<-cbind(fd.est, mean(fd))
  ci.low<-cbind(ci.low, quantile(fd, .025))
  ci.high<-cbind(ci.high,quantile(fd, .975))
  
}


#######################
#PLOTTING FIGURE 2#####
#######################

plot(seq,fd.est,xaxt='n', yaxs="i",yaxt='n', cex.main=0.70, cex.lab=1, main="" , lwd=1.5, type="n", ylim=c(.5,3.5),xlim=c(-10,5),xlab="", ylab="")
for (i in 1:10){
  lines(x=c(seq[i],seq[i]),c(ci.low[i], ci.high[i]), type='l', lty=1, lwd=2.5, col="gray58" )}
points(seq,fd.est, pch= 20, cex=1.3)
abline(h=1,lty=2, lwd=.5)
axis(1,seq, round(exp(seq)*10, 2), cex.axis=.8)
axis(2,seq(.5,3.5,.5), cex.axis=.8)
mtext("Hazard ratio of reporting", side=2, line=2.5, cex=.8)
mtext("Internet users/1000 population", side=1, line=2.5, cex=.8)
legend(-10, 3.3, c("95% confidence interval"), cex=0.8, lwd=c(2.5), lty=c(1), col=c("gray58"), bty="n")

########################
#Supplementary material#
########################

###########
#Table S1.# 
###########

# Model 1 controls for whether disease spreads through direct human-to-human transmission. 

direct<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
               year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+direct, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(direct)

# Model 2 controls for whether the country experienced an outbreak previously in the dataset. 

outbreak<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
               year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+outbreakdum, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(outbreak)

# Model 3 uses only health spending as a measure of surveillance capacity, excluding internet coverage. 

nointer<- zelig(Surv(startreportdays1,report) ~ loghealth+logtrade+domesticopp+ihr_inforce+
                  year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(nointer)

# Model 4 uses the Human Development Index (HDI) as a measure of surveillance capacity instead of health spending and internet coverage (I exclude GDP per capita from the model because HDI includes gross national income in the measure).

hdi<- zelig(Surv(startreportdays1,report) ~ hdi+logtrade+domesticopp+ihr_inforce+
               year+logagri+logtravtourgdp+rolwb+polity2, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(hdi)

###########
#Table S2.#
###########

# Model 1 includes the WHO region in which the outbreak occurs as a control (reference category is the Africa region), while still clustering for standard errors by WHO region. 

region<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
               year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+region, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(region)

# Model 2 clusters standard errors by country while still accounting for regional effects by including region again as a control (reference category is the Africa region). 

countrycl<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
               year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+region, data=timing1, model="coxph", cluster="country", robust=TRUE)
summary(countrycl)

# Model 3 investigates the conditional relationship between the size of the travel and tourism sector and surveillance capacity, showing a positive interaction between the two (standard errors clustered by region).

travtour<- zelig(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
                   year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+loginter*logtravtourgdp, data=timing1, model="coxph", cluster="region", robust=TRUE)
summary(travtour)

###########
#Table S3.#
###########

# Model 1

regionfr<- coxme(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
                    year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+(1|region), data=timing1)
              
summary(regionfr)

# Model 2

countryfr<- coxme(Surv(startreportdays1,report) ~ loghealth+loginter+logtrade+domesticopp+ihr_inforce+
                    year+logagri+logtravtourgdp+rolwb+polity2+logGDPPC+region+(1|country), data=timing1)

summary(countryfr)

####################################
# Table S4. Descriptive statistics##
####################################

data1<-timing1[c("startreportdays1","loghealth", "loginter", "logtrade", "domesticopp", "ihr_inforce")] 
                 
data2<-timing1[c("year","logagri", "logtravtourgdp","rolwb","polity2","logGDPPC")]

xtable(summary(data1, digits=2))
xtable(summary(data2, digits=2)) 

###############################################
#Table S5. Countries with outbreak start date#
##############################################
timing1$missing<-ifelse(is.na(timing1$outbreakstart1)==TRUE, 1, 0)

notmissing<-as.matrix(unique(timing1$country[which(timing1$missing==0)]))
notmissing1<-notmissing[1:49]
notmissing2<-notmissing[50:98]
notmissing<-cbind(notmissing1, notmissing2)

#Table S6. Countries missing outbreak start date

missing<-as.matrix(unique(timing1$country[which(timing1$missing==1)]))
missing1<-missing[1:31]
missing2<-missing[32:62]
missing<-(cbind(missing1, missing2))

